这次回顾第七章的内容,这一章介绍了虚拟机。

课程官网:

https://www.nand2tetris.org/

视频地址:

https://www.coursera.org/learn/build-a-computer

Part 1:课程回顾

背景介绍

在最初编写程序的时候,由于不同设备上的机器语言不同,所以就需要编写很多种编译器,这种编译模式如下:

这种模式会给开发人员带来很多额外的工作量,另一种模式是分两阶段编译,这样就产生了虚拟机的概念:

上述两个编译步骤分别对应后续的几个项目:

这周和下周的项目为将虚拟机语言编译成汇编语言的项目,该虚拟机是基于堆栈的,后续将详细介绍。

虚拟机

堆栈(Stack)是一种很简单的数据结构,其功能如下:

在后续讨论中,我们用sp表示栈顶的位置,来看一个利用堆栈实现算数运算的例子:

我们利用堆栈实现虚拟机,该虚拟机支持许多种命令,这一章先介绍算数和逻辑命令以及内存访问命令:

算数和逻辑命令

内存访问命令

Part 2:项目

项目分为Parser和CodeWriter以及主程序:

Parser

这部分的接口如下:

这部分和上一周的汇编编译器非常类似,只要按照要求稍作修改即可:

class Parser():
    #命令类型
    Arithmetic = ["add", "sub", "neg"]
    Logical = ["eq", "gt", "lt", "and", "or", "not"]
    
    def __init__(self, filename):
        self.text = []
        with open(filename) as f:
            for i in f.readlines():
                data = i.split()
                if len(data) == 0:
                    pass
                elif data[0] == "//":
                    pass
                else:
                    #防止有空格
                    data = ' '.join(data)
                    #删除每行的注释
                    data = data.replace("//", " ")
                    data = data.split()
                    
                    self.text.append(data)

        #记录当前位置
        self.i = 0
        #最大行
        self.max = len(self.text)
        #当前命令
        self.command = ""
        
    def hasMoreCommands(self):
        return self.i < self.max
    
    def advance(self):
        if self.hasMoreCommands():
            self.command = self.text[self.i]
            self.i += 1
        
    def commandType(self):
        #判断命令类型
        if self.command[0] in self.Arithmetic + self.Logical:
            return "C_ARITHMETIC"
        elif self.command[0] == "push":
            return "C_PUSH"
        elif self.command[0] == "pop":
            return "C_POP"
        elif self.command[0] == "label":
            return "C_LABEL"
        elif self.command[0] == "goto":
            return "C_GOTO"
        elif self.command[0] == "if-goto":
            return "C_IF"
        elif self.command[0] == "function":
            return "C_FUNCTION"
        elif self.command[0] == "call":
            return "C_RETURN"
        else:
            return "C_CALL"
        
    def arg1(self):
        if self.commandType() != "C_RETURN":
            if self.commandType() == "C_ARITHMETIC":
                return self.command[0]
            else:
                return self.command[1]
        else:
            return None
    
    def arg2(self):
        if self.commandType() in ["C_PUSH", "C_POP", "C_FUNCTION", "C_CALL"]:
            return int(self.command[-1])
        else:
            return None

CodeWriter

接口如下:

这部分比较难,后面详细介绍。

构造函数
def __init__(self, filename):
	#打开文件,设置文件名,文件名的格式:目录/文件名
	self.setFileName(filename)
	self.file = open(filename + ".asm", "w+")
setFileName
def setFileName(self, filename):
	#分离文件名
	name = filename.split("/")[-1]
	self.filename = name
writeArithmetic

算数运算部分涉及较多,下面分别介绍。

add,sub,and,or命令都是二元运算,实现步骤基本一致,这里以add为例,首先使用汇编语言实现

//更新栈顶位置
@SP
M=M-1
//移动至栈顶
@SP
A=M
//将b存储至D
D=M
//移动到a的位置
@SP
A=M-1
//运算
M=M+D

对应的代码为

if command == "add" or command == "sub" or command == "and" or command == "or":
	self.file.write("@SP\n")
	self.file.write("M=M-1\n")
	self.file.write("@SP\n")
	self.file.write("A=M\n")
	self.file.write("D=M\n")
	self.file.write("@SP\n")
	self.file.write("A=M-1\n")
	if command == "add":
		self.file.write("M=M+D\n")
	elif command == "sub":
		self.file.write("M=M-D\n")
	elif command == "and":
		self.file.write("M=M&D\n")
	else:
		self.file.write("M=M|D\n")

neg和not都非常简单:

elif command == "neg":
	self.file.write("@SP\n")
	self.file.write("A=M-1\n")
	self.file.write("M=-M\n")
elif command == "not":
	self.file.write("@SP\n")
	self.file.write("A=M-1\n")
	self.file.write("M=!M\n")

eq,gt,lt部分当时思考了很久,关键点是要实现分支结构,需要使用汇编语言中的标签,这里以eq为例:

//修改SP
@SP
M=M-1
D=M
@SP
A=M
D=M
@SP
A=M-1
//x-y
D=M-D
//设置默认值为False
@SP
A=M
M=0
//如果条件成立则走到SETcnt
@SETcnt
//判断
D;JEQ
//如果条件不成立则走到NEXTcnt
@NEXTcnt
0;JMP
//条件成立则将值修改为True
(SETcnt)
	@SP
	A=M-1
	M=-1
(NEXTcnt)

对应的代码为

elif command == "eq" or command == "gt" or command == "lt":
	#修改栈顶
	self.file.write("@SP\n")
	self.file.write("M=M-1\n")
	self.file.write("D=M\n")
	#存储值
	self.file.write("@SP\n")
	self.file.write("A=M\n")
	self.file.write("D=M\n")
	self.file.write("@SP\n")
	self.file.write("A=M-1\n")
	#计算x-y
	self.file.write("D=M-D\n")
	#设置为false
	self.file.write("@SP\n")
	self.file.write("A=M-1\n")
	self.file.write("M=0\n")
	self.file.write("@SET" + str(self.cnt) + "\n")
	if command == "eq":
		self.file.write("D;JEQ\n")
	elif command == "gt":
		self.file.write("D;JGT\n")
	elif command == "lt":
		self.file.write("D;JLT\n")
	#如果条件不成立则走到NEXT
	self.file.write("@NEXT" + str(self.cnt) + "\n")
	self.file.write("0;JMP\n")
	#分支
	self.file.write("(SET" + str(self.cnt) + ")\n")
	self.file.write("@SP\n")
	self.file.write("A=M-1\n")
	self.file.write("M=-1\n")
	#跳转
	self.file.write("(NEXT" + str(self.cnt) + ")\n")
	
	#计数增加
	self.cnt += 1
WritePushPop

首先回顾内存结构:

以及涉及的符号含义:

根据参数的不同,Push,Pop也分成几个部分。

push/pop local/argument/this/that i

push constant i

push/pop static i

push/pop temp i

push/pop pointer 0/1

代码如下:

def writePushPop(self, command, segment, index):
	self.file.write("//" + " ".join([command, segment, str(index)]) + "\n")
	if command == "push":
		#第一步获得地址
		if segment == "constant":
			self.file.write("@" + str(index) + "\n")
			self.file.write("D=A\n")
		elif segment == "local" or segment == "argument" or segment == "this" or segment == "that":
			self.file.write("@" + str(index) + "\n")
			self.file.write("D=A\n")
			if segment == "local":
				self.file.write("@LCL\n")
			elif segment == "argument":
				self.file.write("@ARG\n")
			elif segment == "this":
				self.file.write("@THIS\n")
			else:
				self.file.write("@THAT\n")
			self.file.write("A=D+M\n")
			self.file.write("D=M\n")
		elif segment == "temp":
			self.file.write("@" + str(5 + index) + "\n")
			self.file.write("D=M\n")
		elif segment == "pointer":
			if index == 0:
				self.file.write("@THIS\n")
			else:
				self.file.write("@THAT\n")
			self.file.write("D=M\n")
		elif segment == "static":
			self.file.write("@" + self.filename + str(index) + "\n")
			self.file.write("D=M\n")
		#栈顶
		self.file.write("@SP\n")
		self.file.write("A=M\n")
		self.file.write("M=D\n")
		#修改栈顶
		self.file.write("@SP\n")
		self.file.write("M=M+1\n")
		#增加换行
		self.file.write("\n")
	elif command == "pop":
		#获得值
		self.file.write("@SP\n")
		self.file.write("M=M-1\n")
		self.file.write("A=M\n")
		self.file.write("D=M\n")
		
		if segment == "local" or segment == "argument" or segment == "this" or segment == "that":
			#存储数据
			self.file.write("@R13\n")
			self.file.write("M=D\n")
			#转移
			self.file.write("@" + str(index) + "\n")
			self.file.write("D=A\n")
			if segment == "local":
				self.file.write("@LCL\n")
			elif segment == "argument":
				self.file.write("@ARG\n")
			elif segment == "this":
				self.file.write("@THIS\n")
			else:
				self.file.write("@THAT\n")
			#相对位置
			self.file.write("D=D+M\n")
			#存储位置
			self.file.write("@R14\n")
			self.file.write("M=D\n")
			#获取数据
			self.file.write("@R13\n")
			self.file.write("D=M\n")
			#转移
			self.file.write("@R14\n")
			self.file.write("A=M\n")
			self.file.write("M=D\n")
		elif segment == "temp":
			self.file.write("@" + str(5 + index) + "\n")
			self.file.write("M=D\n")
		elif segment == "pointer":
			if index == 0:
				self.file.write("@THIS\n")
			elif index == 1:
				self.file.write("@THAT\n")
			self.file.write("M=D\n")
		elif segment == "static":
			self.file.write("@" + self.filename + str(index) + "\n")
			self.file.write("M=D\n")
		
		#增加换行
		self.file.write("\n")
Close
def close(self):
	self.file.close()

主程序

from Parser import Parser
from CodeWriter import CodeWriter
import sys

if __name__ == "__main__":
    if len(sys.argv) != 2:
        print("Error")
        sys.exit(1)
    #划分输入
    data = sys.argv[-1].split("/")
    #文件名
    filename = data[-1]
    #不带后缀的文件名    
    name = filename.split(".")[0]
    #输入
    name1 = './' + '/'.join(data)
    name2 = './' + '/'.join(data[:-1] + [name])
    #构造类
    parser = Parser(name1)
    codewrite = CodeWriter(name2)
    
    while parser.hasMoreCommands():
        parser.advance()
        command = parser.command[0]
        if parser.commandType() == "C_ARITHMETIC":
            codewrite.writeArithmetic(command)
        elif parser.commandType() == "C_PUSH" or parser.commandType() == "C_POP":
            segment = parser.arg1()
            index = parser.arg2()
            codewrite.writePushPop(command, segment, index)
            
    codewrite.close()

注意主程序部分的输入路径格式,这一点要进行处理。